################################################################################################
#######  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
#######  Replication for analysis in Chapter 6. County-level analysis for West Germany
######## R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
######## Platform: x86_64-apple-darwin15.6.0 (64-bit)
########################################################

rm(list = ls())
library(openxlsx)
library(stargazer)
library(sandwich)
library(lmtest)
library(stringr)
library(arm) # for standardize
library(ggplot2) #for plotting
library(visreg)



cse = function(reg) {
  rob = sqrt(diag(vcovHC(reg, type = "HC1")))
  return(rob)
}
hc1<-function(x) vcovHC(x, type="HC1")  



##########################################################
########## ANALYZING RESULTS OF THE 1949 ELECTION  #######
##########################################################

dat_el<-read.xlsx("chapter6/election_data.xlsx")

#Compute variables:
dat_el$ShareExp1950<-dat_el$Heimatvertriebene1950/dat_el$Pop1950
dat_el$ShareExp1946<-dat_el$Expellees1946/dat_el$Bevolkerung1946

dat_el$ShareCath1939<-dat_el$Katholische1939/dat_el$Population1939
dat_el$ShareEvang1939<-dat_el$Evangelische1939/dat_el$Population1939
dat_el$lnPop1939<-log(dat_el$Population1939)
dat_el$ShareAgr1939<-dat_el$AgricultureInsgesamt1939/dat_el$Erwerbspersonen_Total1939 
dat_el$ShareDestroyed<-dat_el$KriegsschaedenNWohnungen/dat_el$NormalWohnungen 
dat_el$lnDistEastBorder<-log(dat_el$distDDR)
dat_el$ShareCath1950<-dat_el$Kath1950/dat_el$Pop1950
dat_el$PubServPT1950<-dat_el$PubServ_TotEmpl1950/(dat_el$Pop1950/1000)

dat_el$SPD1933<-dat_el$n333spd/dat_el$n333gs


dat_el$French<-ifelse(dat_el$Zone=="French", 1, 0)

###########################################################################
## Table 6.1. Expellee share and voting in the 1949 federal election   ####
###########################################################################

reg1<-lm(Turnout1949~ShareExp1946+ShareAgr1939+ShareDestroyed+lnDistEastBorder+Land, data=dat_el)

reg1b<-lm(Turnout1949~ShareExp1946+ShareCath1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+SPD1933+lnPop1939+Type.x+French+Land, data=dat_el)

reg2<-lm(ShareSPD1949~ShareExp1946+ShareAgr1939+ShareDestroyed+lnDistEastBorder+Land, data=dat_el)

reg2b<-lm(ShareSPD1949~ShareExp1946+ShareCath1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+SPD1933+lnPop1939+Type.x+French+Land, data=dat_el)

reg3<-lm(ShareCDU1949~ShareExp1946+ShareAgr1939+ShareDestroyed+lnDistEastBorder+Land, data=dat_el)

reg3b<-lm(ShareCDU1949~ShareExp1946+ShareCath1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+SPD1933+lnPop1939+Type.x+French+Land, data=dat_el)

reg4<-lm(ShareKPD1949~ShareExp1946+ShareCath1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+SPD1933+lnPop1939+Type.x+French+Land, data=dat_el)

ses1 <- list(coeftest(reg1, vcov = vcovHC(reg1, type="HC1"))[,2], coeftest(reg1b, vcov = vcovHC(reg1b, type="HC1"))[,2], coeftest(reg2, vcov = vcovHC(reg2, type="HC1"))[,2], coeftest(reg2b, vcov = vcovHC(reg2b, type="HC1"))[,2], coeftest(reg3, vcov = vcovHC(reg3, type="HC1"))[,2], coeftest(reg3b, vcov = vcovHC(reg3b, type="HC1"))[,2])

pvals1 <- list(coeftest(reg1, vcov = vcovHC(reg1, type="HC1"))[,4], coeftest(reg1b, vcov = vcovHC(reg1b, type="HC1"))[,4], coeftest(reg2, vcov = vcovHC(reg2, type="HC1"))[,4],  coeftest(reg2b, vcov = vcovHC(reg2b, type="HC1"))[,4], 
               coeftest(reg3, vcov = vcovHC(reg3, type="HC1"))[,4], coeftest(reg3b, vcov = vcovHC(reg3b, type="HC1"))[,4])


stargazer(reg1, reg1b, reg2, reg2b, reg3, reg3b, se = ses1, p=pvals1, digits=2, 
          covariate.labels=c("Expellee share (1946)", "Share Catholic", "Share in agriculture", "Share destroyed",  "Ln dist to the Eastern border","SPD vote in 1933", "Ln population", "City-district", "French zone"), 
          omit=c("Land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, column.labels = c("Turnout 1949", "Turnout 1949", "SPD 1949", "SPD 1949", "CDU/CSU 1949", "CDU/CSU 1949"), 
          title="Results in the 1949 election. Heteroskedasticity-robust standard errors in parentheses.") #


#########################################################################################
######### Figure 6.1: Expellee share and voting in the 1949 federal election  ###########
#########################################################################################


reg1bs<-standardize(reg1b, unchanged="land", standardize.y=TRUE)
reg2bs<-standardize(reg2b, unchanged="land", standardize.y=TRUE)
reg3bs<-standardize(reg3b, unchanged="land", standardize.y=TRUE)


plot.outRefugees <- sapply(list(reg1bs, reg2bs,  reg3bs), function(x){
  coeftest(x, vcov.=hc1)[2,1:2] #Check this number
})



plot.outRefugees <- as.data.frame(t(plot.outRefugees))


plot.outRefugees$dv <- factor(c("Turnout 1949", "SPD 1949", "CDU 1949"), 
                          levels=c("Turnout 1949", "SPD 1949", "CDU 1949")) 
plot.outRefugees$iv <- c(rep("Expellee share",3))
plot.outRefugees$lb <- plot.outRefugees$Estimate - 1.96*plot.outRefugees$`Std. Error`
plot.outRefugees$ub <- plot.outRefugees$Estimate + 1.96*plot.outRefugees$`Std. Error`

fig.vote1949 = ggplot(
  data = plot.outRefugees,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape = iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average marginal effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Turnout 1949", "SPD 1949", "CDU 1949")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Figure6.1.jpg",fig.vote1949, width = 8, height = 6)

################################################################################################
################# Table 6.2 Expellee share and administrative capacity in 1950. ################
################################################################################################


dat<-read.xlsx("chapter6/administrative_capacity.xlsx")
dat$PubServPT1950<-dat$PubServ_TotEmpl1950/(dat$Pop1950/1000)
dat$PubServPC1950<-dat$PubServ_TotEmpl1950/dat$Pop1950

dat$PubServArea1950<-dat$PubServ_TotEmpl1950/dat$AREA50

dat$BeshPubServPC1950<-dat$BeschaeftigteOffDienst/(dat$Pop1950/1000)

dat$ArbPubServPC1950<-dat$ArbeitstaetterPubServ/(dat$Pop1950/1000)

dat$PubServPC1939<-dat$Dienst_Total1939/(dat$Population1939/1000)


dat$BeshPubServExpPC1950<-dat$BeschaeftigteVertrOffDienst/(dat$Pop1950/1000)
dat$BeshPubServNativePC1950<-(dat$BeschaeftigteOffDienst-dat$BeschaeftigteVertrOffDienst)/(dat$Pop1950/1000)

dat$ShareExpInService1950<-dat$BeschaeftigteVertrOffDienst/dat$BeschaeftigteOffDienst

dat$ShareAgric1939<-dat$AgricultureInsgesamt1939/dat$Population1939

#get diversity of expellees: 
dat$ShareExpellees1950<-dat$Heimatvertriebene1950/dat$Pop1950
dat$ShareExpellees1946<-dat$Expellees1946/dat$Bevolkerung1946


dat$ShareDestroyed<-dat$KriegsschaedenNWohnungen/dat$NormalWohnungen 

dat$lnDistEastBorder<-log(dat$distDDR)
dat$lnPop1950<-log(dat$Pop1950)

dat$ShareExpellees1946<-dat$Expellees1946/dat$Bevolkerung1946 #A lot of NAs

dat$French<-ifelse(dat$OccZone50=="FBZ", 1, 0)  


##################################################################
##### REGRESSION ANALYSIS: PUBLIC EMPLOYEES & OCCUPATIONS  #######
##################################################################

datNoNA<-subset(dat, !is.na(BeshPubServPC1950))

datNoNA$LnBeshPubServPC1950<-log(datNoNA$BeshPubServPC1950)
datNoNA$LnBeshPubServExpPC1950<-log(datNoNA$BeshPubServExpPC1950)

lm1<-lm(LnBeshPubServPC1950 ~ ShareExpellees1950+Type+French+Land, data=datNoNA) 
summary(lm1)

lm2<-lm(LnBeshPubServPC1950 ~ ShareExpellees1950+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm2)


#Here we can look at expellees: 
lm3<-lm(LnBeshPubServExpPC1950 ~ ShareExpellees1950+Type+French+Land, data=datNoNA) 
summary(lm3)

lm4<-lm(LnBeshPubServExpPC1950 ~ ShareExpellees1950+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm4)

lm4b<-lm(log(BeshPubServNativePC1950) ~ ShareExpellees1950+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm4b)

lm5<-lm(ShareExpInService1950 ~ ShareExpellees1950+Type+French+Land, data=datNoNA) 
summary(lm5)

lm6<-lm(ShareExpInService1950 ~ ShareExpellees1950+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm6)

## Putting together results in the table: 
ses_occ1 <- list(coeftest(lm1, vcov = vcovHC(lm1, type="HC1"))[,2], coeftest(lm2, vcov = vcovHC(lm2, type="HC1"))[,2], coeftest(lm3, vcov = vcovHC(lm3, type="HC1"))[,2], 
                 coeftest(lm4, vcov = vcovHC(lm4, type="HC1"))[,2], coeftest(lm5, vcov = vcovHC(lm5, type="HC1"))[,2], coeftest(lm6, vcov = vcovHC(lm6, type="HC1"))[,2])

pvals_occ1 <- list(coeftest(lm1, vcov = vcovHC(lm1, type="HC1"))[,4], coeftest(lm2, vcov = vcovHC(lm2, type="HC1"))[,4], coeftest(lm3, vcov = vcovHC(lm3, type="HC1"))[,4],
                   coeftest(lm4, vcov = vcovHC(lm4, type="HC1"))[,4], coeftest(lm5, vcov = vcovHC(lm5, type="HC1"))[,4], coeftest(lm6, vcov = vcovHC(lm6, type="HC1"))[,4])


stargazer(lm1, lm2, lm3, lm4, lm5, lm6, se = ses_occ1, p=pvals_occ1, digits=2, 
          covariate.labels=c("Share Expellees (1950)",  "Share Destroyed", "Share in Agriculture", "Ln Dist to the Eastern border", "Ln Population", "City-district", "French zone"), 
          omit=c("Land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, 
          column.labels = c("Ln(Public Employees per 1000)", "Ln(Public Employees per 1000)", "Share expellees in public service", "Share expellees in public service", "Ln(Expellees in public service per 1000)", "Ln(Expellees in public service per 1000)"), 
          title="Employment in public service in 1950. Robust SEs in parentheses.") #


#Figure 6.2A
visreg(lm2, "ShareExpellees1950",scale = "linear", xlab="Expellee share", ylab="Public employees (per 1000)", trans=exp, fill.par=list(density = 15, angle = 90, col="black"), line.par=list(col="black"))

#Figure 6.2B
visreg(lm4, "ShareExpellees1950",scale = "linear", xlab="Expellee share", ylab="Expellee public employees (per 1000)", trans=exp, fill.par=list(density = 15, angle = 90, col="black"), line.par=list(col="black"))



##################################################################################
##### Table A.12. Alternative explanatory variable, ShareExpellees1946:    #######
##################################################################################


lm1<-lm(LnBeshPubServPC1950 ~ ShareExpellees1946+Type+French+Land, data=datNoNA) 
summary(lm1)

lm2<-lm(LnBeshPubServPC1950 ~ ShareExpellees1946+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm2)


#Here we can look at expellees: 
lm3<-lm(LnBeshPubServExpPC1950 ~ ShareExpellees1946+Type+French+Land, data=datNoNA) 
summary(lm3)

lm4<-lm(LnBeshPubServExpPC1950 ~ ShareExpellees1946+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm4)

lm4b<-lm(log(BeshPubServNativePC1950) ~ ShareExpellees1946+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm4b)

lm5<-lm(ShareExpInService1950 ~ ShareExpellees1946+Type+French+Land, data=datNoNA) 
summary(lm5)

lm6<-lm(ShareExpInService1950 ~ ShareExpellees1946+ShareDestroyed+ShareAgric1939+lnDistEastBorder+lnPop1950+Type+French+Land, data=datNoNA) 
summary(lm6)

## Putting together results in the table: 
ses_occ1 <- list(coeftest(lm1, vcov = vcovHC(lm1, type="HC1"))[,2], coeftest(lm2, vcov = vcovHC(lm2, type="HC1"))[,2], coeftest(lm3, vcov = vcovHC(lm3, type="HC1"))[,2], 
                 coeftest(lm4, vcov = vcovHC(lm4, type="HC1"))[,2], coeftest(lm5, vcov = vcovHC(lm5, type="HC1"))[,2], coeftest(lm6, vcov = vcovHC(lm6, type="HC1"))[,2])

pvals_occ1 <- list(coeftest(lm1, vcov = vcovHC(lm1, type="HC1"))[,4], coeftest(lm2, vcov = vcovHC(lm2, type="HC1"))[,4], coeftest(lm3, vcov = vcovHC(lm3, type="HC1"))[,4],
                   coeftest(lm4, vcov = vcovHC(lm4, type="HC1"))[,4], coeftest(lm5, vcov = vcovHC(lm5, type="HC1"))[,4], coeftest(lm6, vcov = vcovHC(lm6, type="HC1"))[,4])


stargazer(lm1, lm2, lm3, lm4, lm5, lm6, se = ses_occ1, p=pvals_occ1, digits=2, 
          covariate.labels=c("Share expellees (1946)",  "Share destroyed", "Share in agriculture", "Ln dist to the Eastern border", "Ln population", "City-district", "French zone"), 
          omit=c("Land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, 
          column.labels = c("Ln(Public Employees per 1,000)", "Ln(Public Employees per 1,000)", "Share expellees in public service", "Share expellees in public service", "Ln(Expellees in public service per 1000)", "Ln(Expellees in public service per 1000)"), 
          title="Employment in public service in 1950. Robust SEs in parentheses.") 

